# "Media's Influence on LGBTQ Support Across Africa" by Stephen Winkler

# Code to run main models and create:
#   - Table 2 in Main Text (Logit results)
#   - Table A.3 in Appendix (OLS results)
#   - Table A.4 in Appendix (Probit results)

rm(list=ls()) # clear environment
source("Rprofile.R") # setup Rprofile (load packages, etc.)

# Load data
myData <- readRDS("data/afrobarometer.rds")

# Make sure all explanatory variables are numeric:
myData$radio <- as.numeric(myData$radio)
myData$tv <- as.numeric(myData$tv)
myData$newspaper <- as.numeric(myData$newspaper)
myData$internet <- as.numeric(myData$internet)
myData$social_media <- as.numeric(myData$social_media)
myData$media_aggregate <- as.numeric(myData$media_aggregate)
myData$media_noradio <- as.numeric(myData$media_noradio)
myData$media_notv <- as.numeric(myData$media_notv)
myData$media_nopaper <- as.numeric(myData$media_nopaper)
myData$media_nointernet <- as.numeric(myData$media_nointernet)
myData$media_nosocialmedia <- as.numeric(myData$media_nosocialmedia)

#####################
# Logit & OLS Models
#####################

# Model 1: Media Aggregate (Controls + CFE + DCSE)
model <- 
  sexuality2 ~
  media_aggregate +
  tolerance_noLGBT2 +
  female + as.numeric(education) + as.numeric(religiosity) + as.numeric(age) + as.numeric(income_water) + urban +
  as.factor(country)
mdata <- extractdata(model, myData, na.rm=TRUE, #only extract data used in model
                     extra = ~respno + district) #add respno & district
logit.result.1.all <- glm(model, family = binomial, data = mdata)
logit.result.1.all.robust <- coeftest(logit.result.1.all, vcov. = function(x) cluster.vcov(x, ~ district))
lm.result.1.all <- lm(model, data = mdata)
lm.result.1.all.robust <- coeftest(lm.result.1.all, vcov. = function(x) cluster.vcov(x, ~ district))

# Model 2: Media Types (Control other media + Controls + CFE + DCSE)

# Radio
model <-
  sexuality2 ~
  radio +
  media_noradio +
  tolerance_noLGBT2 +
  female + as.numeric(education) + as.numeric(religiosity) + as.numeric(age) + as.numeric(income_water) + urban +
  as.factor(country)
mdata <- extractdata(model, myData, na.rm=TRUE, #only extract data used in model
                     extra = ~respno + district) #add respno & district
logit.result.2.radio <- glm(model, family = binomial, data = mdata)
logit.result.2.radio.robust <- coeftest(logit.result.2.radio, vcov. = function(x) cluster.vcov(x, ~ district))
lm.result.2.radio <- lm(model, data = mdata)
lm.result.2.radio.robust <- coeftest(lm.result.2.radio, vcov. = function(x) cluster.vcov(x, ~ district))

# TV
model <-
  sexuality2 ~
  tv + 
  media_notv +
  tolerance_noLGBT2 +
  female + as.numeric(education) + as.numeric(religiosity) + as.numeric(age) + as.numeric(income_water) + urban +
  as.factor(country)
mdata <- extractdata(model, myData, na.rm=TRUE, #only extract data used in model
                     extra = ~respno + district) #add respno & district
logit.result.2.tv <- glm(model, family = binomial, data = mdata)
logit.result.2.tv.robust <- coeftest(logit.result.2.tv, vcov. = function(x) cluster.vcov(x, ~ district))
lm.result.2.tv <- lm(model, data = mdata)
lm.result.2.tv.robust <- coeftest(lm.result.2.tv, vcov. = function(x) cluster.vcov(x, ~ district))

# Newspaper
model <-
  sexuality2 ~
  newspaper +
  media_nopaper + 
  tolerance_noLGBT2 +
  female + as.numeric(education) + as.numeric(religiosity) + as.numeric(age) + as.numeric(income_water) + urban +
  as.factor(country)
mdata <- extractdata(model, myData, na.rm=TRUE, #only extract data used in model
                     extra = ~respno + district) #add respno & district
logit.result.2.newspaper <- glm(model, family = binomial, data = mdata)
logit.result.2.newspaper.robust <- coeftest(logit.result.2.newspaper, vcov. = function(x) cluster.vcov(x, ~ district))
lm.result.2.newspaper <- lm(model, data = mdata)
lm.result.2.newspaper.robust <- coeftest(lm.result.2.newspaper, vcov. = function(x) cluster.vcov(x, ~ district))

# Internet
model <-
  sexuality2 ~
  internet +
  media_nointernet +
  tolerance_noLGBT2 +
  female + as.numeric(education) + as.numeric(religiosity) + as.numeric(age) + as.numeric(income_water) + urban + 
  as.factor(country)
mdata <- extractdata(model, myData, na.rm=TRUE, #only extract data used in model
                     extra = ~respno + district) #add respno & district
logit.result.2.internet <- glm(model, family = binomial, data = mdata)
logit.result.2.internet.robust <- coeftest(logit.result.2.internet, vcov. = function(x) cluster.vcov(x, ~ district))
lm.result.2.internet <- lm(model, data = mdata)
lm.result.2.internet.robust <- coeftest(lm.result.2.internet, vcov. = function(x) cluster.vcov(x, ~ district))

# Social Media
model <-
  sexuality2 ~
  social_media +
  media_nosocialmedia +
  tolerance_noLGBT2 +
  female + as.numeric(education) + as.numeric(religiosity) + as.numeric(age) + as.numeric(income_water) + urban +
  as.factor(country)
mdata <- extractdata(model, myData, na.rm=TRUE, #only extract data used in model
                     extra = ~respno + district) #add respno & district
logit.result.2.social_media <- glm(model, family = binomial, data = mdata)
logit.result.2.social_media.robust <- coeftest(logit.result.2.social_media, vcov. = function(x) cluster.vcov(x, ~ district))
lm.result.2.social_media <- lm(model, data = mdata)
lm.result.2.social_media.robust <- coeftest(lm.result.2.social_media, vcov. = function(x) cluster.vcov(x, ~ district))

# Generate Latex tables with stargazer
# First, generate logit result table: Table 2 in Main text
stargazer(logit.result.1.all.robust, logit.result.2.radio.robust, logit.result.2.tv.robust,
          logit.result.2.newspaper.robust, logit.result.2.internet.robust, logit.result.2.social_media.robust,
          type = "latex",
          no.space=TRUE, 
          dep.var.caption = "DV: Homosexual as Neighbor (0: dislike, 1: don't care/like)",
          dep.var.labels.include = FALSE,
          title = "Effect of Media Consumption on LGBT Attitudes (Logit Models)",
          omit = "country",
          keep.stat = c("n", "AIC"),
          notes = c("All models include country fixed effects. Standard errors are clustered at the district level.")
)

# Then, OLS results table: Table A.3 in Appendix 
stargazer(lm.result.1.all.robust, lm.result.2.radio.robust, lm.result.2.tv.robust, lm.result.2.newspaper.robust,
          lm.result.2.internet.robust, lm.result.2.social_media.robust,
          no.space=TRUE, 
          dep.var.caption = "DV: Homosexual as Neighbor (0: dislike, 1: don't care/like)",
          dep.var.labels.include = FALSE,
          #column.labels=c("Logit", ""), # "Pol Herf"),
          title = "Effect of Media Consumption on LGBT Attitudes (OLS Models)",
          omit= "country",
          notes = c("All models include country fixed effects. Standard errors are clustered at the district level.")
)
# NOTE: coeftest is the command I use to get results with district-clustered errors.
# This command doesn't output # of observations, AIC, or R2. Therefore, I manually added this information 
# to the tables. 

########################
# Ordered Probit Models: 
########################
# Same model except DV is ordered instead of binary
myData$sexuality <- as.numeric(myData$sexuality)
myData$sexuality <- as.factor(myData$sexuality) #make sure DV is factor
levels(myData$sexuality) # confirm levels 
# Remove countries where question was not asked because they aren't getting dropped from mdata function
myData <- myData %>% filter(country != "Algeria")
myData <- myData %>% filter(country != "Egypt")
myData <- myData %>% filter(country != "Sudan")

# Function to cluster standard errors
c1   <- function(data,fm, cluster){ 
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <-ncol(data)-1 +length(unique(data[,]))
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL) }

# Model 1: Media Aggregate (Controls + CFE + DCSE)
model <- 
  sexuality ~
  media_aggregate +
  tolerance_noLGBT +
  female + as.numeric(education) + as.numeric(religiosity) + as.numeric(age) + as.numeric(income_water) + urban +
  as.factor(country)
mdata <- extractdata(model, myData, na.rm=TRUE)
probit.result1 <- polr(model,
                       data = mdata,
                       method = "probit",
                       Hess = TRUE)

# Apply cluster function
obs2 <- as.numeric(rownames(mdata, probit.result1, myData$district)) #forumula here is data, model, cluster. So in this example I am clustering on District
probit.result1.cluster <- c1(mdata, probit.result1, myData$district[obs2]) 

# Model 2: Media Types (Control other media + Controls + CFE + DCSE)

# Radio
model <-
  sexuality ~
  as.numeric(radio) + 
  media_noradio +
  tolerance_noLGBT +
  female + as.numeric(education) + as.numeric(religiosity) + as.numeric(age) + as.numeric(income_water) + urban +
  as.factor(country)
mdata <- extractdata(model, myData, na.rm=TRUE) 
probit.result2 <- polr(model,
                       data = mdata,
                       method = "probit",
                       Hess = TRUE)
# Apply cluster function
obs2 <- as.numeric(rownames(mdata, probit.result2, myData$district)) #forumula here is data, model, cluster. So in this example I am clustering on District
probit.result2.cluster <- c1(mdata, probit.result2, myData$district[obs2]) 

# TV
model <-
  sexuality ~
  as.numeric(tv) + 
  media_notv +
  tolerance_noLGBT +
  female + as.numeric(education) + as.numeric(religiosity) + as.numeric(age) + as.numeric(income_water) + urban +
  as.factor(country)
mdata <- extractdata(model, myData, na.rm=TRUE)
probit.result3 <- polr(model,
                       data = mdata,
                       method = "probit",
                       Hess = TRUE)
# Apply cluster function
obs2 <- as.numeric(rownames(mdata, probit.result3, myData$district)) #forumula here is data, model, cluster. So in this example I am clustering on District
probit.result3.cluster <- c1(mdata, probit.result3, myData$district[obs2]) 

# Newspaper
model <-
  sexuality ~
  newspaper +
  media_nopaper + 
  tolerance_noLGBT +
  female + as.numeric(education) + as.numeric(religiosity) + as.numeric(age) + as.numeric(income_water) + urban +
  as.factor(country)
mdata <- extractdata(model, myData, na.rm=TRUE)
probit.result4 <- polr(model,
                       data = mdata,
                       method = "probit",
                       Hess = TRUE)
# Apply cluster function
obs2 <- as.numeric(rownames(mdata, probit.result4, myData$district)) #forumula here is data, model, cluster. So in this example I am clustering on District
probit.result4.cluster <- c1(mdata, probit.result4, myData$district[obs2]) 

# Internet
model <-
  sexuality ~
  internet +
  media_nointernet +
  tolerance_noLGBT +
  female + as.numeric(education) + as.numeric(religiosity) + as.numeric(age) + as.numeric(income_water) + urban +
  as.factor(country)

mdata <- extractdata(model, myData, na.rm=TRUE)
probit.result5 <- polr(model,
                       data = mdata,
                       method = "probit",
                       Hess = TRUE)
# Apply cluster function
obs2 <- as.numeric(rownames(mdata, probit.result5, myData$district)) #forumula here is data, model, cluster. So in this example I am clustering on District
probit.result5.cluster <- c1(mdata, probit.result5, myData$district[obs2]) 

# Social Media
model <-
  sexuality ~
  social_media +
  media_nosocialmedia +
  tolerance_noLGBT +
  female + as.numeric(education) + as.numeric(religiosity) + as.numeric(age) + as.numeric(income_water) + urban +
  as.factor(country)
mdata <- extractdata(model, myData, na.rm=TRUE)
probit.result6 <- polr(model,
                       data = mdata,
                       method = "probit",
                       Hess = TRUE)

# Apply cluster function
obs2 <- as.numeric(rownames(mdata, probit.result6, myData$district)) #forumula here is data, model, cluster. So in this example I am clustering on District
probit.result6.cluster <- c1(mdata, probit.result6, myData$district[obs2]) 

# Generate Latex table: Table A.4 in Appendix 
stargazer(probit.result1.cluster, probit.result2.cluster, probit.result3.cluster, probit.result4.cluster,
          probit.result5.cluster, probit.result6.cluster,
          no.space=TRUE, 
          label = "probit_models",
          summary.stat = c("n"), nobs = TRUE,
          dep.var.caption = "DV: Homosexual as Neighbor (Ordinal from Strong Dislike to Strong Like)",
          dep.var.labels.include = FALSE,
          #column.labels=c("Logit", ""), # "Pol Herf"),
          title = "Effect of Media Consumption on LGBT Attitudes (Ordered Probit Models)",
          omit= "country",
          notes = c("All models include country fixed effects. Standard errors are clustered at the district level.")
)